We will clean the environment, setup the locations, define colors, and create a datestamp.
Clean the environment.
Set locations and working directories…
Create a new analysis directories.
- general directory
- for plots
- for output of summary results
- for baseline tables
- for genetic analyses
- for Cox regression results
… a package-installation function …
source("scripts/functions.R")… and load those packages.
source("scripts/packages05.R")If prompted for GitHub credentials, enter your PAT in the password field
Using GITHUB_PAT from Sander W. van der Laan (credential helper: osxkeychain)
Using github PAT from envvar GITHUB_PAT
Skipping install of 'patchwork' from a github remote, the SHA1 (79223d30) has not changed since last install.
Use `force = TRUE` to force installation
Using github PAT from envvar GITHUB_PAT
Skipping install of 'RACER' from a github remote, the SHA1 (1394c9d4) has not changed since last install.
Use `force = TRUE` to force installation
We will create a datestamp and define the Utrecht Science Park Colour Scheme.
Today = format(as.Date(as.POSIXlt(Sys.time())), "%Y%m%d")
Today.Report = format(as.Date(as.POSIXlt(Sys.time())), "%A, %B %d, %Y")
source("scripts/colors.R")We will parse the data to create regional association plots for each of the 11 loci.
library("scales")
pal_npg("nrc")(10) [1] "#E64B35FF" "#4DBBD5FF" "#00A087FF" "#3C5488FF" "#F39B7FFF" "#8491B4FF" "#91D1C2FF" "#DC0000FF" "#7E6148FF"
[10] "#B09C85FF"
show_col(pal_npg("nrc")(10))
# show_col(pal_npg("nrc", alpha = 0.6)(10))We need to load the meta-analysis summary statistics from the European - African-American ancestry analysis first.
gwas_sumstats_racer_EA_AA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_complete_racer.EA_AA.rds"))We need to load the meta-analysis summary statistics from the European-only ancestry analysis.
# gwas_sumstats_EA <- fread(paste0(GWAS_loc,"/CAC1000G_EA_FINAL_FUMA.txt.gz"),
# showProgress = TRUE)
# names(gwas_sumstats_EA)[names(gwas_sumstats_EA) == "Pos"] <- "Position"
# saveRDS(gwas_sumstats_EA, file = paste0(OUT_loc, "/gwas_sumstats.EA.rds"))
#
# gwas_sumstats_racer_EA <- subset(gwas_sumstats_EA,
# select = c("MarkerName", "rsID", "Chr", "Position", "Pvalue"))
#
# saveRDS(gwas_sumstats_racer_EA, file = paste0(OUT_loc, "/gwas_sumstats_racer.EA.rds"))
# rm(gwas_sumstats_EA)
gwas_sumstats_racer_EA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_racer.EA.rds"))We need to load the meta-analysis summary statistics from the African-American-only ancestry analysis.
# gwas_sumstats_unfiltered_AA <- fread(paste0(GWAS_loc,"/CAC1000G_AA_FINAL_FUMA.unfiltered.txt.gz"),
# showProgress = TRUE)
# saveRDS(gwas_sumstats_unfiltered_AA, file = paste0(OUT_loc, "/gwas_sumstats_unfiltered.AA.rds"))
# gwas_sumstats_racer_unfiltered_AA <- subset(gwas_sumstats_unfiltered_AA,
# select = c("MarkerName", "rsID", "Chr", "Position", "Pvalue"))
#
# saveRDS(gwas_sumstats_racer_unfiltered_AA, file = paste0(OUT_loc, "/gwas_sumstats_unfiltered_racer.AA.rds"))
#
# gwas_sumstats_racer_unfiltered_AA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_unfiltered_racer.AA.rds"))
#
# gwas_sumstats_AA <- fread(paste0(GWAS_loc,"/CAC1000G_AA_FINAL_FUMA.filtered.txt.gz"),
# showProgress = TRUE)
# saveRDS(gwas_sumstats_AA, file = paste0(OUT_loc, "/gwas_sumstats.AA.rds"))
#
# gwas_sumstats_racer_AA <- subset(gwas_sumstats_AA,
# select = c("MarkerName", "rsID", "Chr", "Position", "Pvalue"))
#
# saveRDS(gwas_sumstats_racer_AA, file = paste0(OUT_loc, "/gwas_sumstats_racer.AA.rds"))
# rm(gwas_sumstats_AA)
gwas_sumstats_racer_AA <- readRDS(file = paste0(OUT_loc, "/gwas_sumstats_racer.AA.rds"))We are interested in 11 top loci. We will plot these using the EU-AA-ancestry data.
library(openxlsx)
variant_list <- read.xlsx(paste0(TARGET_loc, "/Variants.xlsx"), sheet = "TopLoci")
DT::datatable(variant_list)NALet’s do some plotting.
library(RACER)
# Make directory for plots
ifelse(!dir.exists(file.path(PROJECT_loc, "/RACER")),
dir.create(file.path(PROJECT_loc, "/RACER")),
FALSE)[1] FALSE
RACER_loc = paste0(PROJECT_loc,"/RACER")
variants_of_interest <- c(variant_list$rsID)
variants_of_interest_fewgenes <- c("rs9349379")#, "rs3844006", "rs2854746", "rs4977575", "rs9633535", "rs11063120", "rs9515203", "rs7182103")
for(VARIANT in variants_of_interest){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,17]
tempEND <- subset(variant_list, rsID == VARIANT)[,18]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_EA_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
cat("\nGetting LD data.\n")
temp_f_ld = RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "EUR", lead_snp = VARIANT)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f_ld,
chr = tempCHR, build = "hg19",
plotby = "snp", snp_plot = VARIANT,
label_lead = TRUE, gene_track_h = 2, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
# ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.png"), plot = last_plot())
# ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.pdf"), plot = last_plot())
# ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.eps"), plot = last_plot())
# rm(temp, p1,
# temp_f, temp_f_ld,
# tempCHR, tempSTART, tempEND,
# VARIANT, tempVARIANTnr)
}Getting data for rs9349379.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9349379...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9349379&pop=EUR&r2_d=r2&token=c0f613f149ab"
[45%] Downloaded 32210 bytes...
[100%] Downloaded 70967 bytes...
Merging input association data with LD...
Plotting region surrounding rs9349379 on 6:12403957-13403957.
Plotting by...
snp rs9349379
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs9349379.
Getting data for rs3844006.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs3844006...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs3844006&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 31016 bytes...
Merging input association data with LD...
Plotting region surrounding rs3844006 on 6:131595002-132595002.
Plotting by...
snp rs3844006
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs3844006.
Getting data for rs2854746.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs2854746...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs2854746&pop=EUR&r2_d=r2&token=c0f613f149ab"
[44%] Downloaded 49152 bytes...
[59%] Downloaded 65518 bytes...
[74%] Downloaded 81920 bytes...
[89%] Downloaded 98286 bytes...
[100%] Downloaded 109298 bytes...
Merging input association data with LD...
Plotting region surrounding rs2854746 on 7:45460645-46460645.
Plotting by...
snp rs2854746
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs2854746.
Getting data for rs4977575.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs4977575...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4977575&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 26000 bytes...
Merging input association data with LD...
Plotting region surrounding rs4977575 on 9:21624744-22624744.
Plotting by...
snp rs4977575
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs4977575.
Getting data for rs10899970.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10899970...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10899970&pop=EUR&r2_d=r2&token=c0f613f149ab"
[35%] Downloaded 57344 bytes...
[56%] Downloaded 90112 bytes...
[100%] Downloaded 160476 bytes...
Merging input association data with LD...
Plotting region surrounding rs10899970 on 10:44015716-45334720.
Plotting by...
snp rs10899970
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs10899970.
Getting data for rs9633535.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9633535...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9633535&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 49363 bytes...
Merging input association data with LD...
Plotting region surrounding rs9633535 on 10:63336088-64336088.
Plotting by...
snp rs9633535
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs9633535.
Getting data for rs10762577.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10762577...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10762577&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 92997 bytes...
Merging input association data with LD...
Plotting region surrounding rs10762577 on 10:75417431-76417431.
Plotting by...
snp rs10762577
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs10762577.
Getting data for rs11063120.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs11063120...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs11063120&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 41883 bytes...
Merging input association data with LD...
Plotting region surrounding rs11063120 on 12:3986618-4986618.
Plotting by...
snp rs11063120
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs11063120.
Getting data for rs9515203.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9515203...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9515203&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 30139 bytes...
Merging input association data with LD...
Plotting region surrounding rs9515203 on 13:110549623-111549623.
Plotting by...
snp rs9515203
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs9515203.
Getting data for rs7182103.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7182103...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7182103&pop=EUR&r2_d=r2&token=c0f613f149ab"
[27%] Downloaded 32209 bytes...
[76%] Downloaded 90112 bytes...
[100%] Downloaded 118299 bytes...
Merging input association data with LD...
Plotting region surrounding rs7182103 on 15:78623946-79623946.
Plotting by...
snp rs7182103
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs7182103.
Getting data for rs7412.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7412...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7412&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 38865 bytes...
Merging input association data with LD...
Plotting region surrounding rs7412 on 19:44912079-45912079.
Plotting by...
snp rs7412
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs7412.
These are genetic loci with many genes.
variants_of_interest_manygenes <- c("rs7412", "rs10762577")
for(VARIANT in variants_of_interest_manygenes){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,17]
tempEND <- subset(variant_list, rsID == VARIANT)[,18]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_EA_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
cat("\nGetting LD data.\n")
temp_f_ld = RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "EUR", lead_snp = VARIANT)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f_ld,
chr = tempCHR, build = "hg19",
plotby = "snp", snp_plot = VARIANT,
label_lead = TRUE, gene_track_h = 0.75, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
# ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.png"), plot = last_plot())
# ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.pdf"), plot = last_plot())
# ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.eps"), plot = last_plot())
rm(temp, p1,
temp_f, temp_f_ld,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs7412.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7412...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7412&pop=EUR&r2_d=r2&token=c0f613f149ab"
Merging input association data with LD...
Plotting region surrounding rs7412 on 19:44912079-45912079.
Plotting by...
snp rs7412
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs7412.
Getting data for rs10762577.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10762577...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10762577&pop=EUR&r2_d=r2&token=c0f613f149ab"
[52%] Downloaded 48576 bytes...
[59%] Downloaded 55496 bytes...
[77%] Downloaded 71862 bytes...
[100%] Downloaded 92997 bytes...
Merging input association data with LD...
Plotting region surrounding rs10762577 on 10:75417431-76417431.
Plotting by...
snp rs10762577
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs10762577.
The CXCL12 genetic locus.
variants_of_interest_cxcl12 <- c("rs10899970")
for(VARIANT in variants_of_interest_cxcl12){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,17]
tempEND <- subset(variant_list, rsID == VARIANT)[,18]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_EA_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
cat("\nGetting LD data.\n")
temp_f_ld = RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "EUR", lead_snp = VARIANT)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f_ld,
chr = tempCHR, build = "hg19", set = "all",
plotby = "snp", snp_plot = VARIANT,
label_lead = TRUE, gene_track_h = 0.75, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
# ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.png"), plot = last_plot())
# ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.pdf"), plot = last_plot())
# ggsave(filename = paste0(RACER_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.eps"), plot = last_plot())
rm(temp, p1,
temp_f, temp_f_ld,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs10899970.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10899970...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10899970&pop=EUR&r2_d=r2&token=c0f613f149ab"
[45%] Downloaded 73710 bytes...
[86%] Downloaded 139246 bytes...
[100%] Downloaded 160476 bytes...
Merging input association data with LD...
Plotting region surrounding rs10899970 on 10:44015716-45334720.
Plotting by...
snp rs10899970
Reading in association data
Determining lead SNP
Generating Plot
Saving image for rs10899970.
We want to create some regional association plots to combine with teh UCSC browser tracks, thus we need the exact same regions.
library(openxlsx)
add_list <- read.xlsx(paste0(TARGET_loc, "/Variants.xlsx"), sheet = "AdditionalPlots")
DT::datatable(add_list)NAWe want to color the credible sets, which we load here.
credset <- as_tibble(fread(paste0(PROJECT_loc, "/CredibleSets/CAC_EUR_AFR_cred_set_all_loci_50kb.txt")))
credsetWe want to add the posterior probabilities and make a variable to color by.
gwas_sumstats_racer_credset <- merge(gwas_sumstats_racer_EA_AA,
credset %>% select(RSID, Posterior_Prob),
sort = FALSE,
by.x = "rsID", by.y = "RSID", all.x = TRUE) %>%
# mutate(., Posterior_Prob = ifelse(is.na(Posterior_Prob), 0, Posterior_Prob)) %>%
mutate(CredSet = case_when(Posterior_Prob > 0 ~ '95% credible set',
TRUE ~ 'not in credible set'))
head(gwas_sumstats_racer_credset)
table(gwas_sumstats_racer_credset$CredSet)
95% credible set not in credible set
103 8585944
summary(gwas_sumstats_racer_credset$Posterior_Prob) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0 0 0 0 0 1 8585944
library(RACER)
# library(plotly)
# Make directory for plots
ifelse(!dir.exists(file.path(PROJECT_loc, "/RACER")),
dir.create(file.path(PROJECT_loc, "/RACER")),
FALSE)[1] FALSE
RACER_loc = paste0(PROJECT_loc,"/RACER")
variants_of_interest <- c(add_list$rsID)
for(VARIANT in variants_of_interest){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(add_list, rsID == VARIANT)[,4]
tempSTART <- subset(add_list, rsID == VARIANT)[,5]
tempEND <- subset(add_list, rsID == VARIANT)[,6]
tempNAME <- subset(add_list, rsID == VARIANT)[,3]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_credset, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
cat("\nGetting LD data.\n")
# temp_f_ld = RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "EUR", lead_snp = VARIANT)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f,
chr = tempCHR, build = "hg19",
plotby = "coord", snp_plot = VARIANT,
start_plot = tempSTART, end_plot = tempEND,
label_lead = FALSE,
grey_colors = FALSE,
cred_set = TRUE,
gene_track_h = 3, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
# ggsave(filename = paste0(RACER_loc, "/", tempNAME, ".", Today, ".",VARIANT,".",tempSTART,".",tempEND,".regional_assoc.png"), plot = p1)
# ggsave(filename = paste0(RACER_loc, "/", tempNAME, ".", Today, ".",VARIANT,".",tempSTART,".",tempEND,".regional_assoc.pdf"), plot = p1)
# ggsave(filename = paste0(RACER_loc, "/", tempNAME, ".", Today, ".",VARIANT,".",tempSTART,".",tempEND,".regional_assoc.eps"), plot = p1)
# print(ggplotly(p1))
rm(temp, p1,
temp_f,
tempCHR, tempSTART, tempEND,
VARIANT, tempNAME)
}Getting data for rs9633535.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
Plotting region surrounding rs9633535 on 10:63584853-63921073.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Collecting posterior probabilities
Generating Plot
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Saving image for rs9633535.
Getting data for rs2854746.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
Plotting region surrounding rs2854746 on 7:45894617-46054070.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Collecting posterior probabilities
Generating Plot
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Saving image for rs2854746.
Getting data for rs3844006.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Getting LD data.
Plotting region surrounding rs3844006 on 6:131937915-132289374.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Collecting posterior probabilities
Generating Plot
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Saving image for rs3844006.
Note here that we plot the region, and not based on the lead variant of the EA-AA analyses.
library(RACER)
# Make directory for plots
ifelse(!dir.exists(file.path(PROJECT_loc, "/RACER_AA")),
dir.create(file.path(PROJECT_loc, "/RACER_AA")),
FALSE)[1] FALSE
RACER_AA_loc = paste0(PROJECT_loc,"/RACER_AA")
variants_of_interest <- c(variant_list$rsID)
variants_of_interest_fewgenes <- c("rs9349379",
"rs3844006", # throws an error which I don't understand immediately - could be that the variant is not present in AA 1000G data
"rs2854746", "rs4977575",
"rs10899970",
"rs9633535", # this one throws an LD error
"rs10762577",
"rs11063120", "rs9515203", "rs7182103",
"rs7412")
for(VARIANT in variants_of_interest){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,17]
tempEND <- subset(variant_list, rsID == VARIANT)[,18]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp <- subset(gwas_sumstats_racer_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f = RACER::formatRACER(assoc_data = temp, chr_col = 3, pos_col = 4, p_col = 5)
# cat("\nGetting LD data.\n")
temp_f_ld = RACER::ldRACER(assoc_data = temp_f, rs_col = 2, pops = "AFR", lead_snp = VARIANT)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- singlePlotRACER2(assoc_data = temp_f,
chr = tempCHR, build = "hg19",
plotby = "coord", snp_plot = VARIANT,
start_plot = tempSTART, end_plot = tempEND,
label_lead = FALSE, gene_track_h = 2, gene_name_s = 1.75)
print(p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.AA.png"), plot = last_plot())
ggsave(filename = paste0(RACER_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.AA.pdf"), plot = last_plot())
ggsave(filename = paste0(RACER_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_assoc.AA.eps"), plot = last_plot())
rm(temp, p1,
temp_f, temp_f_ld,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs9349379.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9349379...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9349379&pop=AFR&r2_d=r2&token=c0f613f149ab"
[16%] Downloaded 15980 bytes...
[33%] Downloaded 32210 bytes...
[50%] Downloaded 48576 bytes...
[100%] Downloaded 95751 bytes...
Merging input association data with LD...
Plotting region surrounding rs9349379 on 6:12403957-13403957.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs9349379.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs3844006.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs3844006...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs3844006&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 72560 bytes...
Merging input association data with LD...
Plotting region surrounding rs3844006 on 6:131595002-132595002.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs3844006.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs2854746.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs2854746...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs2854746&pop=AFR&r2_d=r2&token=c0f613f149ab"
[22%] Downloaded 15980 bytes...
[100%] Downloaded 71651 bytes...
Merging input association data with LD...
Plotting region surrounding rs2854746 on 7:45460645-46460645.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs2854746.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs4977575.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs4977575...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4977575&pop=AFR&r2_d=r2&token=c0f613f149ab"
[17%] Downloaded 15980 bytes...
[100%] Downloaded 93249 bytes...
Merging input association data with LD...
Plotting region surrounding rs4977575 on 9:21624744-22624744.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs4977575.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs10899970.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10899970...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10899970&pop=AFR&r2_d=r2&token=c0f613f149ab"
[35%] Downloaded 64977 bytes...
[61%] Downloaded 114688 bytes...
[70%] Downloaded 131054 bytes...
[79%] Downloaded 147456 bytes...
[100%] Downloaded 185291 bytes...
Merging input association data with LD...
Plotting region surrounding rs10899970 on 10:44015716-45334720.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs10899970.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs9633535.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9633535...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9633535&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 42221 bytes...
Merging input association data with LD...
Plotting region surrounding rs9633535 on 10:63336088-64336088.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs9633535.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs10762577.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10762577...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10762577&pop=AFR&r2_d=r2&token=c0f613f149ab"
[78%] Downloaded 114111 bytes...
[90%] Downloaded 130513 bytes...
[100%] Downloaded 144558 bytes...
Merging input association data with LD...
Plotting region surrounding rs10762577 on 10:75417431-76417431.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs10762577.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs11063120.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs11063120...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs11063120&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 46036 bytes...
Merging input association data with LD...
Plotting region surrounding rs11063120 on 12:3986618-4986618.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs11063120.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs9515203.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9515203...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9515203&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 41921 bytes...
Merging input association data with LD...
Plotting region surrounding rs9515203 on 13:110549623-111549623.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs9515203.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs7182103.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7182103...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7182103&pop=AFR&r2_d=r2&token=c0f613f149ab"
[72%] Downloaded 81343 bytes...
[87%] Downloaded 97745 bytes...
[100%] Downloaded 111697 bytes...
Merging input association data with LD...
Plotting region surrounding rs7182103 on 15:78623946-79623946.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs7182103.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs7412.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7412...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7412&pop=AFR&r2_d=r2&token=c0f613f149ab"
[48%] Downloaded 15980 bytes...
[100%] Downloaded 33190 bytes...
Merging input association data with LD...
Plotting region surrounding rs7412 on 19:44912079-45912079.
Association Data Set is missing LD data, the resulting plot won't have LD information, but you can add it using the ldRACER.R function.
Plotting by...
coord
Reading in association data
Generating Plot
Saving image for rs7412.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
The idea behind the Approximate Bayes Factor (ABF) analysis is that the association of each trait with SNPs in a region may be summarised by a vector of 0s and at most a single 1, with the 1 indicating the causal SNP (so, assuming a single causal SNP for each trait).
The posterior probability of each possible configuration can be calculated and so, crucially, can the posterior probabilities that the traits share their configurations. This allows us to estimate the support for the following cases, i.e. hypotheses:
𝐻0: neither trait has a genetic association in the region 𝐻1: only trait 1 has a genetic association in the region 𝐻2: only trait 2 has a genetic association in the region 𝐻3: both traits are associated, but with different causal variants 𝐻4: both traits are associated and share a single causal variant
To what extent do the loci between European and African-American ancestries overlap? We are working on the assumption that the 11 loci are the loci and will test whether these overlap, i.e. colocalize.
Let’s make sure we have remotes and coloc
installed.
if(!require("remotes"))
install.packages("remotes") # if necessary
library(remotes)
install_github("chr1swallace/coloc@main",
build_vignettes = TRUE)Using github PAT from envvar GITHUB_PAT
Skipping install of 'coloc' from a github remote, the SHA1 (0b0701a5) has not changed since last install.
Use `force = TRUE` to force installation
library(coloc)This is coloc version 5.2.0
We can create mirror and scatter plot for each region.
library(RACER)
# Make directory for plots
ifelse(!dir.exists(file.path(PROJECT_loc, "/RACER_EA_vs_AA")),
dir.create(file.path(PROJECT_loc, "/RACER_EA_vs_AA")),
FALSE)[1] FALSE
RACER_EA_vs_AA_loc = paste0(PROJECT_loc,"/RACER_EA_vs_AA")
variants_of_interest <- c(variant_list$rsID)
# variants_of_interest_fewgenes <- c("rs9349379",
# "rs3844006", # throws an error which I don't understand immediately - could be that the variant is not present in AA 1000G data
# "rs2854746", "rs4977575",
# "rs10899970",
# "rs9633535",
# "rs10762577",
# "rs11063120", "rs9515203", "rs7182103",
# "rs7412")
variants_of_interest_fewgenes <- c("rs10762577",
"rs11063120", "rs9515203", "rs7182103",
"rs7412")
for(VARIANT in variants_of_interest){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,17]
tempEND <- subset(variant_list, rsID == VARIANT)[,18]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp1 <- subset(gwas_sumstats_racer_EA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
temp2 <- subset(gwas_sumstats_racer_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nFormatting association data.\n")
temp_f1 = RACER::formatRACER(assoc_data = temp1, chr_col = 3, pos_col = 4, p_col = 5)
temp_f2 = RACER::formatRACER(assoc_data = temp2, chr_col = 3, pos_col = 4, p_col = 5)
# cat("\nGetting LD data.\n")
temp_f_ld1 = RACER::ldRACER(assoc_data = temp_f1, rs_col = 2, pops = "EUR", lead_snp = VARIANT)
temp_f_ld2 = RACER::ldRACER(assoc_data = temp_f2, rs_col = 2, pops = "AFR", lead_snp = VARIANT)
cat(paste0("\nPlotting region surrounding ", VARIANT," on ",tempCHR,":",tempSTART,"-",tempEND,".\n"))
p1 <- mirrorPlotRACER(assoc_data1 = temp_f_ld1,
assoc_data2 = temp_f_ld2,
chr = tempCHR,
name1 = "European ancestry",
name2 = "African-American ancestry",
plotby = "coord",
start_plot = tempSTART, end_plot = tempEND)
print(p1)
p2 <- scatterPlotRACER(assoc_data1 = temp_f_ld1,
assoc_data2 = temp_f_ld2,
chr = tempCHR,
name1 = "European ancestry",
name2 = "African-American ancestry",
region_start = tempSTART,
region_end = tempEND,
ld_df = 1)
print(p2)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.png"), plot = p1)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.pdf"), plot = p1)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_mirror.EA_vs_AA.eps"), plot = p1)
cat(paste0("Saving image for ", VARIANT,".\n"))
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.png"), plot = p2)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.pdf"), plot = p2)
ggsave(filename = paste0(RACER_EA_vs_AA_loc, "/", tempVARIANTnr, ".", Today, ".",VARIANT,".regional_scatter.EA_vs_AA.eps"), plot = p2)
rm(temp1, temp2,
p1,p2,
temp_f1, temp_f_ld1,
temp_f2, temp_f_ld2,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs9349379.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9349379...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9349379&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 70967 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9349379...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9349379&pop=AFR&r2_d=r2&token=c0f613f149ab"
[50%] Downloaded 48576 bytes...
[57%] Downloaded 55496 bytes...
[100%] Downloaded 95751 bytes...
Merging input association data with LD...
Plotting region surrounding rs9349379 on 6:12403957-13403957.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs9349379.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs9349379.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs3844006.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs3844006...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs3844006&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 31016 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs3844006...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs3844006&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 72560 bytes...
Merging input association data with LD...
Plotting region surrounding rs3844006 on 6:131595002-132595002.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs3844006.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs3844006.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs2854746.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs2854746...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs2854746&pop=EUR&r2_d=r2&token=c0f613f149ab"
[80%] Downloaded 88263 bytes...
[95%] Downloaded 104629 bytes...
[100%] Downloaded 109298 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs2854746...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs2854746&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 71651 bytes...
Merging input association data with LD...
Plotting region surrounding rs2854746 on 7:45460645-46460645.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs2854746.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs2854746.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs4977575.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs4977575...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4977575&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 26000 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs4977575...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs4977575&pop=AFR&r2_d=r2&token=c0f613f149ab"
[59%] Downloaded 55496 bytes...
[94%] Downloaded 88264 bytes...
[100%] Downloaded 93249 bytes...
Merging input association data with LD...
Plotting region surrounding rs4977575 on 9:21624744-22624744.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs4977575.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs4977575.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs10899970.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10899970...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10899970&pop=EUR&r2_d=r2&token=c0f613f149ab"
[51%] Downloaded 81920 bytes...
[61%] Downloaded 98286 bytes...
[71%] Downloaded 114688 bytes...
[81%] Downloaded 131054 bytes...
[100%] Downloaded 160476 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10899970...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10899970&pop=AFR&r2_d=r2&token=c0f613f149ab"
[8%] Downloaded 15979 bytes...
[30%] Downloaded 57344 bytes...
[39%] Downloaded 73710 bytes...
[75%] Downloaded 139246 bytes...
[84%] Downloaded 155648 bytes...
[100%] Downloaded 185291 bytes...
Merging input association data with LD...
Plotting region surrounding rs10899970 on 10:44015716-45334720.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs10899970.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs10899970.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs9633535.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9633535...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9633535&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 49363 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9633535...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9633535&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 42221 bytes...
Merging input association data with LD...
Plotting region surrounding rs9633535 on 10:63336088-64336088.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs9633535.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs9633535.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs10762577.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10762577...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10762577&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 92997 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs10762577...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs10762577&pop=AFR&r2_d=r2&token=c0f613f149ab"
[56%] Downloaded 81920 bytes...
[100%] Downloaded 144558 bytes...
Merging input association data with LD...
Plotting region surrounding rs10762577 on 10:75417431-76417431.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs10762577.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs10762577.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs11063120.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs11063120...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs11063120&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 41883 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs11063120...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs11063120&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 46036 bytes...
Merging input association data with LD...
Plotting region surrounding rs11063120 on 12:3986618-4986618.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs11063120.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs11063120.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs9515203.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9515203...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9515203&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 30139 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs9515203...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs9515203&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 41921 bytes...
Merging input association data with LD...
Plotting region surrounding rs9515203 on 13:110549623-111549623.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs9515203.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs9515203.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs7182103.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7182103...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7182103&pop=EUR&r2_d=r2&token=c0f613f149ab"
[55%] Downloaded 65518 bytes...
[83%] Downloaded 98286 bytes...
[100%] Downloaded 118299 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7182103...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7182103&pop=AFR&r2_d=r2&token=c0f613f149ab"
[64%] Downloaded 71861 bytes...
[79%] Downloaded 88263 bytes...
[100%] Downloaded 111697 bytes...
Merging input association data with LD...
Plotting region surrounding rs7182103 on 15:78623946-79623946.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Reading in association data
Generating plot.
Saving image for rs7182103.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs7182103.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Getting data for rs7412.
Subset required data.
Formatting association data.
Formating association data...
Processing -log10(p-values)...
Preparing association data...
Formating association data...
Processing -log10(p-values)...
Preparing association data...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7412...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7412&pop=EUR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 38865 bytes...
Merging input association data with LD...
All inputs are go!
Reading in association data...
Populations selected.
Calculating LD using rs7412...
[1] "https://ldlink.nci.nih.gov/LDlinkRest/ldproxy?var=rs7412&pop=AFR&r2_d=r2&token=c0f613f149ab"
[100%] Downloaded 33190 bytes...
Merging input association data with LD...
Plotting region surrounding rs7412 on 19:44912079-45912079.
Plotting by...
coord
Reading in association data
Generating plot.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
Warning: Removed 1 row(s) containing missing values (geom_path).
Warning: Removed 1 rows containing missing values (geom_text).
Reading in association data
Generating plot.
Saving image for rs7412.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving image for rs7412.
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
Saving 7.29 x 4.51 in image
We want to quantify the overlap.
Setting up an output directory for coloc.
ifelse(!dir.exists(file.path(PROJECT_loc, "/COLOC_EA_vs_AA")),
dir.create(file.path(PROJECT_loc, "/COLOC_EA_vs_AA")),
FALSE)[1] FALSE
COLOC_EA_vs_AA_loc = paste0(PROJECT_loc,"/COLOC_EA_vs_AA")Preparing data for coloc: we require beta and standard
errors for coloc.
First the European ancestry data, next the African-American ancestry data.
gwas_sumstats_EA <- fread(paste0(GWAS_loc,"/CAC1000G_EA_FINAL_FUMA.txt.gz"),
showProgress = TRUE)|--------------------------------------------------|
|==================================================|
names(gwas_sumstats_EA)[names(gwas_sumstats_EA) == "Pos"] <- "Position"
gwas_sumstats_racer_EA <- subset(gwas_sumstats_EA,
select = c("MarkerName", "rsID", "Chr", "Position",
"Effect", "StdErr",
"Pvalue",
"N"))
rm(gwas_sumstats_EA)gwas_sumstats_AA <- fread(paste0(GWAS_loc,"/CAC1000G_AA_FINAL_FUMA.filtered.txt.gz"),
showProgress = TRUE)|--------------------------------------------------|
|==================================================|
names(gwas_sumstats_AA)[names(gwas_sumstats_AA) == "Pos"] <- "Position"
gwas_sumstats_racer_AA <- subset(gwas_sumstats_AA,
select = c("MarkerName", "rsID", "Chr", "Position",
"Effect", "StdErr",
"Pvalue",
"N"))Error in `[.data.table`(x, r, vars, with = FALSE) :
column(s) not found: StdErr
Now we are reading to formally test the colocalization per trait.
Note that trait 1 = European ancestry; trait 2
= African-American ancestry.
for(VARIANT in variants_of_interest){
cat(paste0("Getting data for ", VARIANT,".\n"))
tempCHR <- subset(variant_list, rsID == VARIANT)[,5]
tempSTART <- subset(variant_list, rsID == VARIANT)[,17]
tempEND <- subset(variant_list, rsID == VARIANT)[,18]
tempVARIANTnr <- subset(variant_list, rsID == VARIANT)[,1]
cat("\nSubset required data.\n")
temp1 <- subset(gwas_sumstats_racer_EA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
temp2 <- subset(gwas_sumstats_racer_AA, Chr == tempCHR & (Position >= tempSTART & Position <= tempEND))
cat("\nCheck temp1 data.\n")
temp1 <- rename_with(temp1, tolower)
# correcting column names
temp1 <- rename(temp1, beta = effect)
temp1 <- rename(temp1, se = stderr)
temp1 <- rename(temp1, snp = rsid)
# calculating things
temp1$varbeta <- temp1$se^2
temp1 <- as.list(temp1) # critical, as coloc expects a list of variables
temp1$type <- "quant"
temp1$sdY <- 1
coloc::check_dataset(temp1, warn.minp = 1e-10)
cat("\nCheck temp2 data.\n")
temp2 <- rename_with(temp2, tolower)
# correcting column names
temp2 <- rename(temp2, beta = effect)
temp2 <- rename(temp2, se = stderr)
temp2 <- rename(temp2, snp = rsid)
# calculating things
temp2$varbeta <- temp2$se^2
temp2 <- as.list(temp2) # critical, as coloc expects a list of variables
temp2$type <- "quant"
temp2$sdY <- 1
coloc::check_dataset(temp2, warn.minp = 1e-10)
cat("\nPlot required data.\n")
plot_dataset(temp1)
plot_dataset(temp2)
res_temp1_vs_temp2_single <- coloc.abf(dataset1 = temp1,
dataset2 = temp2)
cat("\nColocalization.\n")
print(res_temp1_vs_temp2_single)
write_lines(res_temp1_vs_temp2_single, file = paste0(COLOC_EA_vs_AA_loc, "/res_EA_vs_AA_single.",
tempVARIANTnr,".",tempCHR,"_",tempSTART,"_",tempEND,".txt"))
coloc::sensitivity(res_temp1_vs_temp2_single, "H4 > 0.9")
# Step 1: Call the pdf command to start the plot
pdf(file = paste0(COLOC_EA_vs_AA_loc, "/res_EA_vs_AA_single.",
tempVARIANTnr,".",tempCHR,"_",tempSTART,"_",tempEND,".pdf")) # The directory you want to save the file in
# Step 2: Create the plot with R code
coloc::sensitivity(res_temp1_vs_temp2_single, "H4 > 0.9")
# Step 3: Run dev.off() to create the file!
dev.off()
rm(temp1, temp2,
# p1,p2,
# temp_f1, temp_f_ld1,
# temp_f2, temp_f_ld2,
tempCHR, tempSTART, tempEND,
VARIANT, tempVARIANTnr)
}Getting data for rs9349379.
Subset required data.
Check temp1 data.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 0.0010709
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 0.0010709
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
2.64e-21 4.40e-01 1.86e-22 3.04e-02 5.30e-01
[1] "PP abf for shared variant: 53%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7610836 0.1082 0.1082 0.01169642 0.01082
Posterior
nsnps H0 H1 H2 H3 H4
1.082000e+03 2.643499e-21 4.401108e-01 1.857062e-22 3.038835e-02 5.295009e-01
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs3844006.
Subset required data.
Check temp1 data.
Warning in coloc::check_dataset(temp1, warn.minp = 1e-10) :
minimum p value is: 3.5548e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 9.5528e-05
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset1, 1) :
minimum p value is: 3.5548e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 9.5528e-05
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.21300 0.06910 0.02150 0.00628 0.69000
[1] "PP abf for shared variant: 69%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.8049601 0.0891 0.0891 0.0079299 0.00891
Posterior
nsnps H0 H1 H2 H3 H4
8.910000e+02 2.127262e-01 6.907437e-02 2.147238e-02 6.281855e-03 6.904452e-01
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs2854746.
Subset required data.
Check temp1 data.
Warning in coloc::check_dataset(temp1, warn.minp = 1e-10) :
minimum p value is: 1.7731e-07
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 0.00072736
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 0.00072736
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.5990 0.1080 0.0686 0.0122 0.2120
[1] "PP abf for shared variant: 21.2%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7459854 0.1147 0.1147 0.01314462 0.01147
Posterior
nsnps H0 H1 H2 H3 H4
1.147000e+03 5.987696e-01 1.080300e-01 6.855817e-02 1.215678e-02 2.124855e-01
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs4977575.
Subset required data.
Check temp1 data.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 0.00077724
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 0.00077724
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
3.82e-38 4.04e-01 3.70e-39 3.86e-02 5.58e-01
[1] "PP abf for shared variant: 55.8%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7394555 0.1175 0.1175 0.0137945 0.01175
Posterior
nsnps H0 H1 H2 H3 H4
1.175000e+03 3.819485e-38 4.037077e-01 3.703500e-39 3.858714e-02 5.577051e-01
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs10899970.
Subset required data.
Check temp1 data.
Warning in coloc::check_dataset(temp1, warn.minp = 1e-10) :
minimum p value is: 5.4509e-09
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 0.00014931
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 0.00014931
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.001320 0.848000 0.000167 0.107000 0.043600
[1] "PP abf for shared variant: 4.36%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.6284776 0.1641 0.1641 0.0269124 0.01641
Posterior
nsnps H0 H1 H2 H3 H4
1.641000e+03 1.319568e-03 8.476892e-01 1.669259e-04 1.071894e-01 4.363490e-02
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs9633535.
Subset required data.
Check temp1 data.
Warning in coloc::check_dataset(temp1, warn.minp = 1e-10) :
minimum p value is: 3.7901e-07
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 1.7844e-05
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 1.7844e-05
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.1540 0.5240 0.0155 0.0527 0.2540
[1] "PP abf for shared variant: 25.4%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.8188193 0.083 0.083 0.0068807 0.0083
Posterior
nsnps H0 H1 H2 H3 H4
830.00000000 0.15368622 0.52399743 0.01553863 0.05272534 0.25405239
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs10762577.
Subset required data.
Check temp1 data.
Warning in coloc::check_dataset(temp1, warn.minp = 1e-10) :
minimum p value is: 1.2432e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 0.00073363
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset1, 1) :
minimum p value is: 1.2432e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 0.00073363
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.06080 0.85200 0.00167 0.02340 0.06210
[1] "PP abf for shared variant: 6.21%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.9440809 0.0263 0.0263 0.00068906 0.00263
Posterior
nsnps H0 H1 H2 H3 H4
2.630000e+02 6.077064e-02 8.521307e-01 1.669798e-03 2.335196e-02 6.207694e-02
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs11063120.
Subset required data.
Check temp1 data.
Warning in coloc::check_dataset(temp1, warn.minp = 1e-10) :
minimum p value is: 9.1265e-09
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 3.7022e-05
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 3.7022e-05
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.00446 0.70700 0.00105 0.16600 0.12200
[1] "PP abf for shared variant: 12.2%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7992586 0.0916 0.0916 0.0083814 0.00916
Posterior
nsnps H0 H1 H2 H3 H4
9.160000e+02 4.464724e-03 7.069361e-01 1.047960e-03 1.658103e-01 1.217409e-01
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs9515203.
Subset required data.
Check temp1 data.
Warning in coloc::check_dataset(temp1, warn.minp = 1e-10) :
minimum p value is: 3.1936e-07
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 0.0005347
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 0.0005347
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.1080 0.7390 0.0142 0.0972 0.0417
[1] "PP abf for shared variant: 4.17%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.686176 0.1401 0.1401 0.019614 0.01401
Posterior
nsnps H0 H1 H2 H3 H4
1.401000e+03 1.078718e-01 7.390042e-01 1.419994e-02 9.723877e-02 4.168536e-02
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs7182103.
Subset required data.
Check temp1 data.
Warning in coloc::check_dataset(temp1, warn.minp = 1e-10) :
minimum p value is: 1.9223e-09
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 5.9197e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 5.9197e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
9.93e-05 8.11e-01 1.82e-05 1.49e-01 3.97e-02
[1] "PP abf for shared variant: 3.97%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7836873 0.0984 0.0984 0.00967272 0.00984
Posterior
nsnps H0 H1 H2 H3 H4
9.840000e+02 9.928622e-05 8.113958e-01 1.820615e-05 1.487462e-01 3.974052e-02
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
Getting data for rs7412.
Subset required data.
Check temp1 data.
Check temp2 data.
Warning in coloc::check_dataset(temp2, warn.minp = 1e-10) :
minimum p value is: 2.9412e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
Plot required data.
Warning in check_dataset(d = dataset2, 2) :
minimum p value is: 2.9412e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf
0.01090 0.64900 0.00402 0.23800 0.09770
[1] "PP abf for shared variant: 9.77%"
Colocalization.
Coloc analysis of trait 1, trait 2
SNP Priors
p1 p2 p12
1e-04 1e-04 1e-05
Hypothesis Priors
H0 H1 H2 H3 H4
0.7896519 0.0958 0.0958 0.00916806 0.00958
Posterior
nsnps H0 H1 H2 H3 H4
9.580000e+02 1.094517e-02 6.491241e-01 4.018346e-03 2.382180e-01 9.769439e-02
Results fail decision rule H4 > 0.9
Results fail decision rule H4 > 0.9
ENPP1/ENPP3 locus
File: res_EA_vs_AA_single.1.6_131595002_132595002.txt
c(nsnps = 891, PP.H0.abf = 0.212726180436945, PP.H1.abf = 0.0690743673677815, PP.H2.abf = 0.0214723764600455, PP.H3.abf = 0.00628185511784578, PP.H4.abf = 0.690445220617383)
Conclusion: 69.0% probability that the locus is shared between both ancestries and includes the same causal variant.
IGFBP3 locus
File: res_EA_vs_AA_single.2.7_45460645_46460645.txt
c(nsnps = 1147, PP.H0.abf = 0.598769555553722, PP.H1.abf = 0.108030020950978, PP.H2.abf = 0.0685581712636257, PP.H3.abf = 0.0121567818199419, PP.H4.abf = 0.212485470411732)
Conclusion: 59.9% probability that the locus is not associated in either ancestry, but 10.8 that it is European-specific, and 21.2% that it is shared between both ancestries and includes the same causal variant.
CXCL12 locus
File: res_EA_vs_AA_single.3.10_44015716_45334720.txt
c(nsnps = 1641, PP.H0.abf = 0.00131956785769608, PP.H1.abf = 0.847689205708678, PP.H2.abf = 0.000166925872986017, PP.H3.abf = 0.107189395862512, PP.H4.abf = 0.0436349046981281)
Conclusion: 84.8% probability that the locus is European-specific, but 4.3-10.7% probability that the locus is shared between both ancestries.
ARID5B locus
File: res_EA_vs_AA_single.4.10_63336088_64336088.txt
c(nsnps = 830, PP.H0.abf = 0.153686217856268, PP.H1.abf = 0.52399742579072, PP.H2.abf = 0.0155386299541433, PP.H3.abf = 0.0527253377600635, PP.H4.abf = 0.254052388638805)
Conclusion: 52.4% probability that the locus is European-specific, but 25.4% probability that the locus is shared between both ancestries and includes different causal variants.
ADK locus
File: res_EA_vs_AA_single.5.10_75417431_76417431.txt
c(nsnps = 263, PP.H0.abf = 0.0607706367611815, PP.H1.abf = 0.85213066483438, PP.H2.abf = 0.00166979761323014, PP.H3.abf = 0.0233519569734565, PP.H4.abf = 0.0620769438177522)
Conclusion: 85.2% probability that the locus is European-specific, but 2.3-6.2% probability that the locus is shared between both ancestries.
FGF23 locus
File: res_EA_vs_AA_single.6.12_3986618_4986618.txt
c(nsnps = 916, PP.H0.abf = 0.00446472426354749, PP.H1.abf = 0.706936091086602, PP.H2.abf = 0.00104796032447378, PP.H3.abf = 0.165810337258493, PP.H4.abf = 0.121740887066883)
Conclusion: 70.7% probability that the locus is European-specific, but 12.2-16.6% probability that the locus is shared between both ancestries.
COL4A1/COL4A2 locus
File: res_EA_vs_AA_single.7.13_110549623_111549623.txt
c(nsnps = 1401, PP.H0.abf = 0.10787176608552, PP.H1.abf = 0.739004164794562, PP.H2.abf = 0.0141999398787617, PP.H3.abf = 0.0972387717170162, PP.H4.abf = 0.0416853575241402)
Conclusion: 73.9% probability that the locus is European-specific, but 4.1-9.7% probability that the locus is shared between both ancestries.
MORF4L locus
File: res_EA_vs_AA_single.8.15_78623946_79623946.txt
c(nsnps = 984, PP.H0.abf = 9.92862156724414e-05, PP.H1.abf = 0.811395804353311, PP.H2.abf = 1.82061468585998e-05, PP.H3.abf = 0.148746181818894, PP.H4.abf = 0.0397405214652622)
Conclusion: 81.1% probability that the locus is European-specific, but 14.9% probability that the locus is shared between both ancestries and includes different causal variants.
PHACTR1 locus
File: res_EA_vs_AA_single.9.6_12403957_13403957.txt
c(nsnps = 1082, PP.H0.abf = 2.64349916808974e-21, PP.H1.abf = 0.440110764390371, PP.H2.abf = 1.85706249346447e-22, PP.H3.abf = 0.0303883523691947, PP.H4.abf = 0.529500883240437)
Conclusion: 44.0% probability that the locus is European-specific, but 52.9% probability that the locus is shared between both ancestries and includes the same causal variant.
CDKN2A/CDKN2B locus
File: res_EA_vs_AA_single.10.9_21624744_22624744.txt
c(nsnps = 1175, PP.H0.abf = 3.8194848005248e-38, PP.H1.abf = 0.40370772152903, PP.H2.abf = 3.70349960775562e-39, PP.H3.abf = 0.0385871394254695, PP.H4.abf = 0.557705139045494)
Conclusion: 40.4% probability that the locus is European-specific, but 55.8% probability that the locus is shared between both ancestries and includes the same causal variant.
APOE locus
File: res_EA_vs_AA_single.11.19_44912079_45912079.txt
c(nsnps = 958, PP.H0.abf = 0.0109451663563229, PP.H1.abf = 0.649124092491782, PP.H2.abf = 0.00401834569685491, PP.H3.abf = 0.238218007619045, PP.H4.abf = 0.0976943878359947)
Conclusion: 64.9% probability that the locus is European-specific, but 23.8% probability that the locus is shared between both ancestries and includes different causal variants.
Overarching conclusions:
Version: v1.4.2
Last update: 2022-06-28
Written by: Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl).
Description: Script to create plot regional association plots.
Minimum requirements: R version 3.4.3 (2017-06-30) -- 'Single Candle', Mac OS X El Capitan
Changes log
* v1.4.2 Added formal quantification of colocalization between ancestries.
* v1.4.1 Added mirror plots and scatter plots.
* v1.4.0 Update with AA data.
* v1.3.0 Added the credible sets to the aditional regions.
* v1.2.0 Added in aditional regions.
* v1.1.0 Created PNG and PDF of top loci regions.
* v1.0.0 Initial version.
sessionInfo()save.image(paste0(PROJECT_loc, "/",Today,".",PROJECTNAME,".RegionalAssociationPlots.RData"))| © 1979-2022 Sander W. van der Laan | s.w.vanderlaan[at]gmail.com | swvanderlaan.github.io. |